# !pip install git+https://github.com/scverse/squidpy.git --user
# import cormerant as cmt
import numpy as np
import pandas as pd
import squidpy as sq
from copy import deepcopy
from scipy.cluster import hierarchy as sch
import scanpy as sc
from matplotlib import pyplot as plt
import os
import pickle
%matplotlib inline
C:\Users\DouglasHannumJr\AppData\Roaming\Python\Python310\site-packages\geopandas\_compat.py:123: UserWarning: The Shapely GEOS version (3.11.1-CAPI-1.17.1) is incompatible with the GEOS version PyGEOS was compiled with (3.10.4-CAPI-1.16.2). Conversions between both will be slow. warnings.warn( C:\Users\DouglasHannumJr\anaconda3\lib\site-packages\spatialdata\__init__.py:9: UserWarning: Geopandas was set to use PyGEOS, changing to shapely 2.0 with: geopandas.options.use_pygeos = True If you intended to use PyGEOS, set the option to False. _check_geopandas_using_shapely()
Going to follow the example workflow provided by squidpy to run receptor-ligand analysis with a re-implementation of cellphonedb.
directory = '/Users/DouglasHannumJr/Desktop/s3_bucket_data/s3_bucket_data/'
os.chdir(directory)
os.listdir()
['anndata.h5ad', 'anndata.pickle', 'cell_boundaries.parquet', 'cell_by_gene.csv', 'cell_metadata.csv', 'images', 'neuro_panel_cluster_annotation.csv', 'partitioned_transcripts.csv', 'spat.Rds']
# import inspect
# lines = inspect.getsource(sq.read.vizgen)
# print(lines)
adata = sq.read.vizgen(
directory,
counts_file = 'cell_by_gene.csv',
meta_file = 'cell_metadata.csv')
adata
AnnData object with n_obs × n_vars = 78273 × 483
obs: 'fov', 'volume', 'min_x', 'min_y', 'max_x', 'max_y', 'anisotropy', 'transcript_count', 'perimeter_area_ratio', 'solidity'
uns: 'spatial'
obsm: 'blank_genes', 'spatial'
adata.var_names_make_unique()
sc.pp.filter_cells(adata, min_counts = 100)
sc.pp.filter_genes(adata, min_cells = 10)
adata
AnnData object with n_obs × n_vars = 67729 × 483
obs: 'fov', 'volume', 'min_x', 'min_y', 'max_x', 'max_y', 'anisotropy', 'transcript_count', 'perimeter_area_ratio', 'solidity', 'n_counts'
var: 'n_cells'
uns: 'spatial'
obsm: 'blank_genes', 'spatial'
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
sc.pp.scale(adata, max_value = 10)
resolution = .5
sc.tl.pca(adata, svd_solver='arpack')
sc.pp.neighbors(adata, n_neighbors=10, n_pcs=20)
sc.tl.umap(adata)
sc.tl.leiden(adata, resolution=resolution)
C:\Users\DouglasHannumJr\AppData\Roaming\Python\Python310\site-packages\umap\distances.py:1063: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details. @numba.jit() C:\Users\DouglasHannumJr\AppData\Roaming\Python\Python310\site-packages\umap\distances.py:1071: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details. @numba.jit() C:\Users\DouglasHannumJr\AppData\Roaming\Python\Python310\site-packages\umap\distances.py:1086: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details. @numba.jit() C:\Users\DouglasHannumJr\AppData\Roaming\Python\Python310\site-packages\umap\umap_.py:660: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details. @numba.jit()
sc.set_figure_params(figsize=(10,10))
sc.pl.umap(adata, color = ['leiden'], size = 5)
C:\Users\DouglasHannumJr\AppData\Roaming\Python\Python310\site-packages\scanpy\plotting\_tools\scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored cax = scatter(
sq.pl.spatial_scatter(
adata,
shape = None, color = 'leiden', size = 0.5,
library_id = 'spatial', figsize=(10,10))
C:\Users\DouglasHannumJr\AppData\Roaming\Python\Python310\site-packages\squidpy\pl\_spatial_utils.py:956: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored _cax = scatter(
markers = pd.read_csv(directory + 'neuro_panel_cluster_annotation.csv', index_col=0)
markers.index = markers.gene
markers.head()
| gene | Class | ClusterName | Description | Neurotransmitter | Region | |
|---|---|---|---|---|---|---|
| gene | ||||||
| Igf2 | Igf2 | Vascular | VECA | Vascular endothelial cells, arterial | NaN | CNS |
| Ngfr | Ngfr | Neurons | SYNOR4 | Noradrenergic erector muscle neurons | Noradrenaline | Sympathetic ganglion |
| Ccnd2 | Ccnd2 | Neurons | SZNBL | Neuronal intermidate progenitor cells | NaN | Striatum dorsal, Striatum ventral, Dentate gyrus |
| Th | Th | Neurons | SYNOR4 | Noradrenergic erector muscle neurons | Noradrenaline | Sympathetic ganglion |
| Lhx2 | Lhx2 | Neurons | TEGLU13 | Excitatory neurons, cerebral cortex | Glutamate (VGLUT1,VGLUT2), Nitric oxide | Cortex |
adata.var = pd.merge(adata.var, markers, how = 'left', left_index=True, right_index=True)
adata.var.Class.value_counts()
Class Neurons 106 Vascular 13 Ependymal 5 Oligos 5 Astrocytes 5 Immune 4 PeripheralGlia 3 Name: count, dtype: int64
markers.Class.value_counts()
Class Neurons 388 Vascular 30 Oligos 27 Ependymal 18 PeripheralGlia 18 Astrocytes 11 Immune 8 Name: count, dtype: int64
%%capture
sc.tl.rank_genes_groups(adata, 'leiden', method = 'wilcoxon')
%%capture
top_markers = pd.DataFrame()
for cluster in adata.obs['leiden'].unique():
markers_ = adata.uns['rank_genes_groups']['names'][cluster][:10]
scores = adata.uns['rank_genes_groups']['scores'][cluster][:10]
top_markers = pd.concat([top_markers,pd.DataFrame({'cluster': [cluster]*10,
'gene': markers_,
'scores': scores})])
markers = markers.reset_index(drop = True)
top_markers = pd.merge(top_markers, markers, on = 'gene', how = 'left')
marker_table = pd.crosstab(top_markers.cluster, top_markers.Class)
labels = marker_table.idxmax(axis = 1)
labels.index = labels.index.astype(dtype = 'int')
labels = labels.sort_index()
labels.name = 'cluster_type'
adata.obs['cluster'] = adata.obs.leiden.astype('int')
adata.obs = pd.merge(adata.obs, labels, how = 'left', on = 'cluster')
adata.obs['cluster'] = adata.obs.leiden.astype('str') + '-' + adata.obs.cluster_type
sc.pl.umap(adata, color = 'cluster', size = 5)
C:\Users\DouglasHannumJr\AppData\Roaming\Python\Python310\site-packages\scanpy\plotting\_tools\scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored cax = scatter(
sq.pl.spatial_scatter(
adata,
shape = None, color = 'cluster', size = 0.5,
library_id = 'spatial', figsize=(10,10))
C:\Users\DouglasHannumJr\AppData\Roaming\Python\Python310\site-packages\squidpy\pl\_spatial_utils.py:956: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored _cax = scatter(
sc.pl.umap(adata, color = 'cluster_type', size = 5)
C:\Users\DouglasHannumJr\AppData\Roaming\Python\Python310\site-packages\scanpy\plotting\_tools\scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored cax = scatter(
sq.pl.spatial_scatter(
adata,
shape = None, color = 'cluster_type', size = 0.5,
library_id = 'spatial', figsize=(10,10))
C:\Users\DouglasHannumJr\AppData\Roaming\Python\Python310\site-packages\squidpy\pl\_spatial_utils.py:956: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored _cax = scatter(
sq.gr.spatial_neighbors(adata, coord_type = 'generic', spatial_key = 'spatial')
sq.gr.nhood_enrichment(adata, cluster_key = 'cluster')
0%| | 0/1000 [00:00<?, ?/s]
os.listdir()
['anndata.h5ad', 'anndata.pickle', 'cell_boundaries.parquet', 'cell_by_gene.csv', 'cell_metadata.csv', 'images', 'neuro_panel_cluster_annotation.csv', 'partitioned_transcripts.csv', 'spat.Rds']
with open('anndata.pickle', 'wb') as f:
pickle.dump(adata, f)
# import cormerant as cmt
import numpy as np
import pandas as pd
import squidpy as sq
from copy import deepcopy
from scipy.cluster import hierarchy as sch
import scanpy as sc
from matplotlib import pyplot as plt
import os
import pickle
%matplotlib inline
C:\Users\DouglasHannumJr\AppData\Roaming\Python\Python310\site-packages\geopandas\_compat.py:123: UserWarning: The Shapely GEOS version (3.11.1-CAPI-1.17.1) is incompatible with the GEOS version PyGEOS was compiled with (3.10.4-CAPI-1.16.2). Conversions between both will be slow. warnings.warn( C:\Users\DouglasHannumJr\anaconda3\lib\site-packages\spatialdata\__init__.py:9: UserWarning: Geopandas was set to use PyGEOS, changing to shapely 2.0 with: geopandas.options.use_pygeos = True If you intended to use PyGEOS, set the option to False. _check_geopandas_using_shapely()
Going to follow the example workflow provided by squidpy to run receptor-ligand analysis with a re-implementation of cellphonedb.
directory = '/Users/DouglasHannumJr/Desktop/s3_bucket_data/s3_bucket_data/'
os.chdir(directory)
os.listdir()
['anndata.h5ad', 'anndata.pickle', 'cell_boundaries.parquet', 'cell_by_gene.csv', 'cell_metadata.csv', 'images', 'neuro_panel_cluster_annotation.csv', 'partitioned_transcripts.csv', 'spat.Rds']
with open('anndata.pickle', 'rb') as f:
adata = pickle.load(f)
Receptor Ligand Stuff
adata
AnnData object with n_obs × n_vars = 67729 × 483
obs: 'fov', 'volume', 'min_x', 'min_y', 'max_x', 'max_y', 'anisotropy', 'transcript_count', 'perimeter_area_ratio', 'solidity', 'n_counts', 'leiden', 'cluster', 'cluster_type'
var: 'n_cells', 'mean', 'std', 'gene', 'Class', 'ClusterName', 'Description', 'Neurotransmitter', 'Region'
uns: 'spatial', 'log1p', 'pca', 'neighbors', 'umap', 'leiden', 'leiden_colors', 'rank_genes_groups', 'cluster_colors', 'cluster_type_colors', 'spatial_neighbors', 'cluster_nhood_enrichment'
obsm: 'blank_genes', 'spatial', 'X_pca', 'X_umap'
varm: 'PCs'
obsp: 'distances', 'connectivities', 'spatial_connectivities', 'spatial_distances'
adata.obs.head()
| fov | volume | min_x | min_y | max_x | max_y | anisotropy | transcript_count | perimeter_area_ratio | solidity | n_counts | leiden | cluster | cluster_type | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | NaN | 926.843300 | 7651.597166 | 3591.773372 | 7667.818766 | 3608.102973 | 1.380044 | 226 | 0.502872 | 3.257825 | 226.0 | 3 | 3-Astrocytes | Astrocytes |
| 1 | NaN | 1068.359528 | 7822.885170 | 3682.169375 | 7835.639971 | 3693.314975 | 1.307624 | 204 | 0.401261 | 5.802649 | 204.0 | 3 | 3-Astrocytes | Astrocytes |
| 2 | NaN | 519.714508 | 7799.557170 | 3745.457376 | 7809.082770 | 3757.790977 | 1.437755 | 158 | 0.628701 | 4.245162 | 158.0 | 3 | 3-Astrocytes | Astrocytes |
| 3 | NaN | 560.521831 | 7696.201167 | 3708.953375 | 7704.106767 | 3720.422976 | 1.527251 | 115 | 0.569795 | 5.278365 | 114.0 | 4 | 4-Vascular | Vascular |
| 4 | NaN | 1975.395430 | 7661.317166 | 3632.381373 | 7679.806767 | 3650.654974 | 1.200734 | 770 | 0.304745 | 4.898135 | 764.0 | 8 | 8-Neurons | Neurons |
adata_sub = adata[adata.obs['min_x'] > 5000]
adata_sub
View of AnnData object with n_obs × n_vars = 29669 × 483
obs: 'fov', 'volume', 'min_x', 'min_y', 'max_x', 'max_y', 'anisotropy', 'transcript_count', 'perimeter_area_ratio', 'solidity', 'n_counts', 'leiden', 'cluster', 'cluster_type'
var: 'n_cells', 'mean', 'std', 'gene', 'Class', 'ClusterName', 'Description', 'Neurotransmitter', 'Region'
uns: 'spatial', 'log1p', 'pca', 'neighbors', 'umap', 'leiden', 'leiden_colors', 'rank_genes_groups', 'cluster_colors', 'cluster_type_colors', 'spatial_neighbors', 'cluster_nhood_enrichment'
obsm: 'blank_genes', 'spatial', 'X_pca', 'X_umap'
varm: 'PCs'
obsp: 'distances', 'connectivities', 'spatial_connectivities', 'spatial_distances'
res = sq.gr.ligrec(
adata_sub,
n_perms = 100,
cluster_key = 'cluster',
copy = True,
use_raw = False,
interaction_params={'resources':'CellPhoneDB'},
transmitter_params ={'categories':'ligand'},
receiver_params = {'categories':'receptor'})
0%| | 0/100 [00:00<?, ?permutation/s]
res['means'].shape
(107, 441)
res['means'].sum(axis=1)
source target
KIT PDGFRB 40.567856
NRP1 PDGFRB 53.153027
CXCL12 EPHB2 54.514167
FLT4 KDR 21.601169
NRP1 KDR 50.795039
...
SLC17A7 GRM6 39.142934
SLC17A8 GRM6 3.817010
SLC17A6 GRIN2B 45.490812
SLC17A7 GRIN2B 75.827596
SLC17A8 GRIN2B 14.858360
Length: 107, dtype: float64
res['pvalues'].head()
| cluster_1 | 0-Oligos | ... | 9-Neurons | |||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| cluster_2 | 0-Oligos | 1-Neurons | 10-Neurons | 11-Neurons | 12-Immune | 13-Oligos | 14-Vascular | 15-Vascular | 16-Neurons | 17-Neurons | ... | 19-Neurons | 2-Neurons | 20-Vascular | 3-Astrocytes | 4-Vascular | 5-Neurons | 6-Astrocytes | 7-Neurons | 8-Neurons | 9-Neurons | |
| source | target | |||||||||||||||||||||
| KIT | PDGFRB | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | 0.0 | NaN | NaN | 0.0 | NaN | NaN | NaN |
| NRP1 | PDGFRB | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | 0.0 | NaN | NaN | 0.0 | NaN | NaN | NaN |
| CXCL12 | EPHB2 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| FLT4 | KDR | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| NRP1 | KDR | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | 0.0 | NaN | 0.0 | NaN | NaN | NaN | NaN | NaN |
5 rows × 441 columns
res['metadata'].head()
| aspect_intercell_source | aspect_intercell_target | category_intercell_source | category_intercell_target | category_source_intercell_source | category_source_intercell_target | consensus_direction | consensus_inhibition | consensus_score_intercell_source | consensus_score_intercell_target | ... | scope_intercell_source | scope_intercell_target | secreted_intercell_source | secreted_intercell_target | sources | transmitter_intercell_source | transmitter_intercell_target | type | uniprot_intercell_source | uniprot_intercell_target | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| source | target | |||||||||||||||||||||
| KIT | PDGFRB | functional | functional | ligand | receptor | resource_specific | resource_specific | False | False | 1 | 21 | ... | generic | generic | False | False | HPRD_MIMP;KEA;MIMP;NetworKIN_KEA;PhosphoSite_M... | True | False | post_translational | P10721 | P09619 |
| NRP1 | PDGFRB | functional | functional | ligand | receptor | resource_specific | resource_specific | False | False | 2 | 21 | ... | generic | generic | True | False | Cellinker | True | False | post_translational | O14786 | P09619 |
| CXCL12 | EPHB2 | functional | functional | ligand | receptor | resource_specific | resource_specific | True | False | 21 | 20 | ... | generic | generic | True | False | Wang | True | False | post_translational | P48061 | P29323 |
| FLT4 | KDR | functional | functional | ligand | receptor | resource_specific | resource_specific | False | False | 1 | 23 | ... | generic | generic | True | True | HPRD;IntAct;Li2012;PhosphoPoint;SPIKE_LC | True | False | post_translational | P35916 | P35968 |
| NRP1 | KDR | functional | functional | ligand | receptor | resource_specific | resource_specific | True | False | 2 | 23 | ... | generic | generic | True | True | BioGRID;CellChatDB-cofactors;HPRD;SPIKE_LC;Wang | True | False | post_translational | O14786 | P35968 |
5 rows × 41 columns
sq.pl.ligrec(res, source_groups = '0-Oligos', alpha = 0.005)
C:\Users\DouglasHannumJr\AppData\Roaming\Python\Python310\site-packages\scanpy\plotting\_dotplot.py:749: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored dot_ax.scatter(x, y, **kwds)